Set Options

knitr::opts_chunk$set(
  warning = TRUE, # show warnings during codebook generation
  message = TRUE, # show messages during codebook generation
  error = TRUE, # do not interrupt codebook generation in case of errors,
    # usually better for debugging
  echo = TRUE  # show R code
)
ggplot2::theme_set(ggplot2::theme_bw())

library(rio)
library(labelled)

Prep Data

library(codebook)
## 
## Attaching package: 'codebook'
## The following object is masked from 'package:labelled':
## 
##     to_factor
codebook_data <- import("../data_processing/output_data/item_data/sr_item_data.csv")

var_label(codebook_data) <- list(
  word = "Original stimulus showed to the participant in the language of the study.",
  class = "Word/nonword trial type indicator, which tells you what the correct answer is for the trial.",
  avgRT = "Average response latency for the stimulus item across all participants.",
  avgZ_RT = "Average z-scored response latency for the stimulus item - z-scored by participant session, then averaged across the item, regardless of condition.",
  samplesize = "The number of participants who were shown that item.",
  n_answered = "The number of participants who answered that item (i.e., it did not time out.",
  seRT = "The standard error of the raw response latency.",
  seZ_RT = "The standard error of the z-scored response latency.",
  accuracy = "The proportion of correct answers out of total answered for that item.",
  Z2.5_avgRT = "The average raw response latency excluding outliers of 2.5 Z or higher.",
  Z2.5_avgZ_RT = "The average Z-scored response latency excluding outliers of 2.5 Z or higher.",
  Z2.5_samplesize = "The number of data points viewed that were not Z equal 2.5 or higher", 
  Z2.5_n_answered = "The number of data points viewed that were not Z equal 2.5 or higher. Note that viewed and answered are the same because you had to answer the item (correctly) for it be to included in the Z-score calculation.",
  Z2.5_seRT = "Standard error of the raw response latency after removing Z equal 2.5 or higher.",
  Z2.5_seZ_RT = "Standard error of the Z-scored response latency after removing Z equal 2.5 or higher.",
  Z2.5_accuracy = "The accuracy for Z scored response latencies after excluding Z greater than 2.5 and higher. Note that you had to get the item right for it to be included in the Z-score calculation, so this value is always 1.", 
  Z3.0_avgRT = "The average raw response latency excluding outliers of 3.0 Z or higher.",
  Z3.0_avgZ_RT = "The average Z-scored response latency excluding outliers of 3.0 Z or higher.",
  Z3.0_samplesize = "The number of data points viewed that were not Z equal 3.0 or higher", 
  Z3.0_n_answered = "The number of data points viewed that were not Z equal 3.0 or higher. Note that viewed and answered are the same because you had to answer the item (correctly) for it be to included in the Z-score calculation.",
  Z3.0_seRT = "Standard error of the raw response latency after removing Z equal 3.0 or higher.",
  Z3.0_seZ_RT = "Standard error of the Z-scored response latency after removing Z equal 3.0 or higher.",
  Z3.0_accuracy = "The accuracy for Z scored response latencies after excluding Z greater than 3.0 and higher. Note that you had to get the item right for it to be included in the Z-score calculation, so this value is always 1.")

metadata(codebook_data)$name <- "Semantic Priming Across Many Languages (Example using Serbian data)"
metadata(codebook_data)$description <- "This dataset includes the summarized item data from the SPAML project (example is specifically Serbian, but all files are structured the same way). The data is averaged over items, regardless of condition for words (i.e., related or unrelated trial). 

Semantic priming has been studied for nearly 50 years across various experimental manipulations and theoretical frameworks. These studies provide insight into the cognitive underpinnings of semantic representations in both healthy and clinical populations; however, they have suffered from several issues including generally low sample sizes and a lack of diversity in linguistic implementations. Here, we will test the size and the variability of the semantic priming effect across ten languages by creating a large database of semantic priming values, based on an adaptive sampling procedure. Differences in response latencies between related word-pair conditions and unrelated word-pair conditions (i.e., difference score confidence interval is greater than zero) will allow quantifying evidence for semantic priming, whereas improvements in model fit with the addition of a random intercept for language will provide support for variability in semantic priming across languages."
metadata(codebook_data)$identifier <- "https://doi.org/10.5281/zenodo.10888833"
metadata(codebook_data)$creator <- "Erin M. Buchanan"
metadata(codebook_data)$citation <- "Buchanan, E., Cuccolo, K., Heyman, T., Iyer, A., Coles, N., Lewis Jr, N., Peters, K., van Berkel, N., Taylor, J., Van't Veer, A. E., Montefinese, M., Valentine, K. D., Maxwell, N., Türkan, B. N., Williams, G., Oliveros-Chacana, J. C., Röer, J., Fini, C., Acar, O., … Lewis, S. C. (2024). SemanticPriming/SPAML: SPAML v1 Data Release (v1.0.0) [Data set]. Zenodo. https://doi.org/10.5281/zenodo.10888833"
metadata(codebook_data)$url <- "https://github.com/SemanticPriming/SPAML/releases/"
metadata(codebook_data)$datePublished <- "2024-05-01"
metadata(codebook_data)$temporalCoverage <- "2022-2024" 
metadata(codebook_data)$spatialCoverage <- "Online" 

Create codebook

codebook(codebook_data)

Metadata

Description

Dataset name: Semantic Priming Across Many Languages (Example using Serbian data)

This dataset includes the summarized item data from the SPAML project (example is specifically Serbian, but all files are structured the same way). The data is averaged over items, regardless of condition for words (i.e., related or unrelated trial).

Semantic priming has been studied for nearly 50 years across various experimental manipulations and theoretical frameworks. These studies provide insight into the cognitive underpinnings of semantic representations in both healthy and clinical populations; however, they have suffered from several issues including generally low sample sizes and a lack of diversity in linguistic implementations. Here, we will test the size and the variability of the semantic priming effect across ten languages by creating a large database of semantic priming values, based on an adaptive sampling procedure. Differences in response latencies between related word-pair conditions and unrelated word-pair conditions (i.e., difference score confidence interval is greater than zero) will allow quantifying evidence for semantic priming, whereas improvements in model fit with the addition of a random intercept for language will provide support for variability in semantic priming across languages.

Metadata for search engines
name value
1 Erin M. Buchanan
x
word
class
avgRT
avgZ_RT
samplesize
n_answered
seRT
seZ_RT
accuracy
Z2.5_avgRT
Z2.5_avgZ_RT
Z2.5_samplesize
Z2.5_n_answered
Z2.5_seRT
Z2.5_seZ_RT
Z2.5_accuracy
Z3.0_avgRT
Z3.0_avgZ_RT
Z3.0_samplesize
Z3.0_n_answered
Z3.0_seRT
Z3.0_seZ_RT
Z3.0_accuracy

#Variables

word

Original stimulus showed to the participant in the language of the study.

Distribution

Distribution of values for word

Distribution of values for word

0 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
word Original stimulus showed to the participant in the language of the study. character 0 1 3917 0 2 20 0

class

Word/nonword trial type indicator, which tells you what the correct answer is for the trial.

Distribution

Distribution of values for class

Distribution of values for class

0 missing values.

Summary statistics

name label data_type n_missing complete_rate n_unique empty min max whitespace
class Word/nonword trial type indicator, which tells you what the correct answer is for the trial. character 0 1 2 0 4 7 0

avgRT

Average response latency for the stimulus item across all participants.

Distribution

Distribution of values for avgRT

Distribution of values for avgRT

0 missing values.

Summary statistics

name label data_type n_missing complete_rate min median max mean sd hist
avgRT Average response latency for the stimulus item across all participants. numeric 0 1 383 854 1756 892.9244 194.4628 ▁▇▃▁▁

avgZ_RT

Average z-scored response latency for the stimulus item - z-scored by participant session, then averaged across the item, regardless of condition.

Distribution

Distribution of values for avgZ_RT

Distribution of values for avgZ_RT

0 missing values.

Summary statistics

name label data_type n_missing complete_rate min median max mean sd hist
avgZ_RT Average z-scored response latency for the stimulus item - z-scored by participant session, then averaged across the item, regardless of condition. numeric 0 1 -1.4 -0.083 2.8 0.0194038 0.5490782 ▂▇▃▁▁

samplesize

The number of participants who were shown that item.

Distribution

Distribution of values for samplesize

Distribution of values for samplesize

0 missing values.

Summary statistics

name label data_type n_missing complete_rate min median max mean sd hist
samplesize The number of participants who were shown that item. numeric 0 1 1 124 366 125.7955 24.74073 ▁▇▁▁▁

n_answered

The number of participants who answered that item (i.e., it did not time out.

Distribution

Distribution of values for n_answered

Distribution of values for n_answered

0 missing values.

Summary statistics

name label data_type n_missing complete_rate min median max mean sd hist
n_answered The number of participants who answered that item (i.e., it did not time out. numeric 0 1 72 130 412 132.8486 23.89708 ▇▂▁▁▁

seRT

The standard error of the raw response latency.

Distribution

Distribution of values for seRT

Distribution of values for seRT

1 missing values.

Summary statistics

name label data_type n_missing complete_rate min median max mean sd hist
seRT The standard error of the raw response latency. numeric 1 0.9997447 0.84 30 143 31.91637 10.79077 ▇▇▁▁▁

seZ_RT

The standard error of the z-scored response latency.

Distribution

Distribution of values for seZ_RT

Distribution of values for seZ_RT

1 missing values.

Summary statistics

name label data_type n_missing complete_rate min median max mean sd hist
seZ_RT The standard error of the z-scored response latency. numeric 1 0.9997447 0.03 0.07 0.47 0.0747394 0.0258709 ▇▁▁▁▁

accuracy

The proportion of correct answers out of total answered for that item.

Distribution

Distribution of values for accuracy

Distribution of values for accuracy

0 missing values.

Summary statistics

name label data_type n_missing complete_rate min median max mean sd hist
accuracy The proportion of correct answers out of total answered for that item. numeric 0 1 0.0092 0.97 1 0.9470538 0.0771837 ▁▁▁▁▇

Z2.5_avgRT

The average raw response latency excluding outliers of 2.5 Z or higher.

Distribution

Distribution of values for Z2.5_avgRT

Distribution of values for Z2.5_avgRT

0 missing values.

Summary statistics

name label data_type n_missing complete_rate min median max mean sd hist
Z2.5_avgRT The average raw response latency excluding outliers of 2.5 Z or higher. numeric 0 1 383 838 1391 863.0617 164.703 ▁▇▇▃▁

Z2.5_avgZ_RT

The average Z-scored response latency excluding outliers of 2.5 Z or higher.

Distribution

Distribution of values for Z2.5_avgZ_RT

Distribution of values for Z2.5_avgZ_RT

0 missing values.

Summary statistics

name label data_type n_missing complete_rate min median max mean sd hist
Z2.5_avgZ_RT The average Z-scored response latency excluding outliers of 2.5 Z or higher. numeric 0 1 -1.4 -0.13 1.4 -0.0748136 0.4520479 ▁▇▇▃▁

Z2.5_samplesize

The number of data points viewed that were not Z equal 2.5 or higher

Distribution

Distribution of values for Z2.5_samplesize

Distribution of values for Z2.5_samplesize

0 missing values.

Summary statistics

name label data_type n_missing complete_rate min median max mean sd hist
Z2.5_samplesize The number of data points viewed that were not Z equal 2.5 or higher numeric 0 1 1 121 363 122.1008 25.37061 ▁▇▁▁▁

Z2.5_n_answered

The number of data points viewed that were not Z equal 2.5 or higher. Note that viewed and answered are the same because you had to answer the item (correctly) for it be to included in the Z-score calculation.

Distribution

Distribution of values for Z2.5_n_answered

Distribution of values for Z2.5_n_answered

0 missing values.

Summary statistics

name label data_type n_missing complete_rate min median max mean sd hist
Z2.5_n_answered The number of data points viewed that were not Z equal 2.5 or higher. Note that viewed and answered are the same because you had to answer the item (correctly) for it be to included in the Z-score calculation. numeric 0 1 1 121 363 122.1008 25.37061 ▁▇▁▁▁

Z2.5_seRT

Standard error of the raw response latency after removing Z equal 2.5 or higher.

Distribution

Distribution of values for Z2.5_seRT

Distribution of values for Z2.5_seRT

1 missing values.

Summary statistics

name label data_type n_missing complete_rate min median max mean sd hist
Z2.5_seRT Standard error of the raw response latency after removing Z equal 2.5 or higher. numeric 1 0.9997447 0.84 27 143 28.97971 9.645963 ▇▆▁▁▁

Z2.5_seZ_RT

Standard error of the Z-scored response latency after removing Z equal 2.5 or higher.

Distribution

Distribution of values for Z2.5_seZ_RT

Distribution of values for Z2.5_seZ_RT

1 missing values.

Summary statistics

name label data_type n_missing complete_rate min median max mean sd hist
Z2.5_seZ_RT Standard error of the Z-scored response latency after removing Z equal 2.5 or higher. numeric 1 0.9997447 0.028 0.058 0.29 0.0607403 0.0161149 ▇▁▁▁▁

Z2.5_accuracy

The accuracy for Z scored response latencies after excluding Z greater than 2.5 and higher. Note that you had to get the item right for it to be included in the Z-score calculation, so this value is always 1.

Distribution

Distribution of values for Z2.5_accuracy

Distribution of values for Z2.5_accuracy

0 missing values.

Summary statistics

name label data_type n_missing complete_rate min median max mean sd hist
Z2.5_accuracy The accuracy for Z scored response latencies after excluding Z greater than 2.5 and higher. Note that you had to get the item right for it to be included in the Z-score calculation, so this value is always 1. numeric 0 1 1 1 1 1 0 ▁▁▇▁▁

Z3.0_avgRT

The average raw response latency excluding outliers of 3.0 Z or higher.

Distribution

Distribution of values for Z3.0_avgRT

Distribution of values for Z3.0_avgRT

0 missing values.

Summary statistics

name label data_type n_missing complete_rate min median max mean sd hist
Z3.0_avgRT The average raw response latency excluding outliers of 3.0 Z or higher. numeric 0 1 383 844 1455 873.9104 175.4686 ▁▇▇▃▁

Z3.0_avgZ_RT

The average Z-scored response latency excluding outliers of 3.0 Z or higher.

Distribution

Distribution of values for Z3.0_avgZ_RT

Distribution of values for Z3.0_avgZ_RT

0 missing values.

Summary statistics

name label data_type n_missing complete_rate min median max mean sd hist
Z3.0_avgZ_RT The average Z-scored response latency excluding outliers of 3.0 Z or higher. numeric 0 1 -1.4 -0.12 1.7 -0.0449096 0.4814571 ▁▇▆▂▁

Z3.0_samplesize

The number of data points viewed that were not Z equal 3.0 or higher

Distribution

Distribution of values for Z3.0_samplesize

Distribution of values for Z3.0_samplesize

0 missing values.

Summary statistics

name label data_type n_missing complete_rate min median max mean sd hist
Z3.0_samplesize The number of data points viewed that were not Z equal 3.0 or higher numeric 0 1 1 123 364 123.5933 25.04458 ▁▇▁▁▁

Z3.0_n_answered

The number of data points viewed that were not Z equal 3.0 or higher. Note that viewed and answered are the same because you had to answer the item (correctly) for it be to included in the Z-score calculation.

Distribution

Distribution of values for Z3.0_n_answered

Distribution of values for Z3.0_n_answered

0 missing values.

Summary statistics

name label data_type n_missing complete_rate min median max mean sd hist
Z3.0_n_answered The number of data points viewed that were not Z equal 3.0 or higher. Note that viewed and answered are the same because you had to answer the item (correctly) for it be to included in the Z-score calculation. numeric 0 1 1 123 364 123.5933 25.04458 ▁▇▁▁▁

Z3.0_seRT

Standard error of the raw response latency after removing Z equal 3.0 or higher.

Distribution

Distribution of values for Z3.0_seRT

Distribution of values for Z3.0_seRT

1 missing values.

Summary statistics

name label data_type n_missing complete_rate min median max mean sd hist
Z3.0_seRT Standard error of the raw response latency after removing Z equal 3.0 or higher. numeric 1 0.9997447 0.84 28 143 29.9302 10.00673 ▇▆▁▁▁

Z3.0_seZ_RT

Standard error of the Z-scored response latency after removing Z equal 3.0 or higher.

Distribution

Distribution of values for Z3.0_seZ_RT

Distribution of values for Z3.0_seZ_RT

1 missing values.

Summary statistics

name label data_type n_missing complete_rate min median max mean sd hist
Z3.0_seZ_RT Standard error of the Z-scored response latency after removing Z equal 3.0 or higher. numeric 1 0.9997447 0.028 0.061 0.35 0.0640459 0.0178888 ▇▁▁▁▁

Z3.0_accuracy

The accuracy for Z scored response latencies after excluding Z greater than 3.0 and higher. Note that you had to get the item right for it to be included in the Z-score calculation, so this value is always 1.

Distribution

Distribution of values for Z3.0_accuracy

Distribution of values for Z3.0_accuracy

0 missing values.

Summary statistics

name label data_type n_missing complete_rate min median max mean sd hist
Z3.0_accuracy The accuracy for Z scored response latencies after excluding Z greater than 3.0 and higher. Note that you had to get the item right for it to be included in the Z-score calculation, so this value is always 1. numeric 0 1 1 1 1 1 0 ▁▁▇▁▁

Missingness report

Codebook table

JSON-LD metadata

The following JSON-LD can be found by search engines, if you share this codebook publicly on the web.

{
  "name": "Semantic Priming Across Many Languages (Example using Serbian data)",
  "description": "This dataset includes the summarized item data from the SPAML project (example is specifically Serbian, but all files are structured the same way). The data is averaged over items, regardless of condition for words (i.e., related or unrelated trial). \n\nSemantic priming has been studied for nearly 50 years across various experimental manipulations and theoretical frameworks. These studies provide insight into the cognitive underpinnings of semantic representations in both healthy and clinical populations; however, they have suffered from several issues including generally low sample sizes and a lack of diversity in linguistic implementations. Here, we will test the size and the variability of the semantic priming effect across ten languages by creating a large database of semantic priming values, based on an adaptive sampling procedure. Differences in response latencies between related word-pair conditions and unrelated word-pair conditions (i.e., difference score confidence interval is greater than zero) will allow quantifying evidence for semantic priming, whereas improvements in model fit with the addition of a random intercept for language will provide support for variability in semantic priming across languages.\n\n\n## Table of variables\nThis table contains variable names, labels, and number of missing values.\nSee the complete codebook for more.\n\n[truncated]\n\n### Note\nThis dataset was automatically described using the [codebook R package](https://rubenarslan.github.io/codebook/) (version 0.9.2).",
  "identifier": "https://doi.org/10.5281/zenodo.10888833",
  "creator": "Erin M. Buchanan",
  "citation": "Buchanan, E., Cuccolo, K., Heyman, T., Iyer, A., Coles, N., Lewis Jr, N., Peters, K., van Berkel, N., Taylor, J., Van't Veer, A. E., Montefinese, M., Valentine, K. D., Maxwell, N., Türkan, B. N., Williams, G., Oliveros-Chacana, J. C., Röer, J., Fini, C., Acar, O., … Lewis, S. C. (2024). SemanticPriming/SPAML: SPAML v1 Data Release (v1.0.0) [Data set]. Zenodo. https://doi.org/10.5281/zenodo.10888833",
  "url": "https://github.com/SemanticPriming/SPAML/releases/",
  "datePublished": "2024-05-01",
  "temporalCoverage": "2022-2024",
  "spatialCoverage": "Online",
  "keywords": ["word", "class", "avgRT", "avgZ_RT", "samplesize", "n_answered", "seRT", "seZ_RT", "accuracy", "Z2.5_avgRT", "Z2.5_avgZ_RT", "Z2.5_samplesize", "Z2.5_n_answered", "Z2.5_seRT", "Z2.5_seZ_RT", "Z2.5_accuracy", "Z3.0_avgRT", "Z3.0_avgZ_RT", "Z3.0_samplesize", "Z3.0_n_answered", "Z3.0_seRT", "Z3.0_seZ_RT", "Z3.0_accuracy"],
  "@context": "http://schema.org/",
  "@type": "Dataset",
  "variableMeasured": [
    {
      "name": "word",
      "description": "Original stimulus showed to the participant in the language of the study.",
      "@type": "propertyValue"
    },
    {
      "name": "class",
      "description": "Word/nonword trial type indicator, which tells you what the correct answer is for the trial.",
      "@type": "propertyValue"
    },
    {
      "name": "avgRT",
      "description": "Average response latency for the stimulus item across all participants.",
      "@type": "propertyValue"
    },
    {
      "name": "avgZ_RT",
      "description": "Average z-scored response latency for the stimulus item - z-scored by participant session, then averaged across the item, regardless of condition.",
      "@type": "propertyValue"
    },
    {
      "name": "samplesize",
      "description": "The number of participants who were shown that item.",
      "@type": "propertyValue"
    },
    {
      "name": "n_answered",
      "description": "The number of participants who answered that item (i.e., it did not time out.",
      "@type": "propertyValue"
    },
    {
      "name": "seRT",
      "description": "The standard error of the raw response latency.",
      "@type": "propertyValue"
    },
    {
      "name": "seZ_RT",
      "description": "The standard error of the z-scored response latency.",
      "@type": "propertyValue"
    },
    {
      "name": "accuracy",
      "description": "The proportion of correct answers out of total answered for that item.",
      "@type": "propertyValue"
    },
    {
      "name": "Z2.5_avgRT",
      "description": "The average raw response latency excluding outliers of 2.5 Z or higher.",
      "@type": "propertyValue"
    },
    {
      "name": "Z2.5_avgZ_RT",
      "description": "The average Z-scored response latency excluding outliers of 2.5 Z or higher.",
      "@type": "propertyValue"
    },
    {
      "name": "Z2.5_samplesize",
      "description": "The number of data points viewed that were not Z equal 2.5 or higher",
      "@type": "propertyValue"
    },
    {
      "name": "Z2.5_n_answered",
      "description": "The number of data points viewed that were not Z equal 2.5 or higher. Note that viewed and answered are the same because you had to answer the item (correctly) for it be to included in the Z-score calculation.",
      "@type": "propertyValue"
    },
    {
      "name": "Z2.5_seRT",
      "description": "Standard error of the raw response latency after removing Z equal 2.5 or higher.",
      "@type": "propertyValue"
    },
    {
      "name": "Z2.5_seZ_RT",
      "description": "Standard error of the Z-scored response latency after removing Z equal 2.5 or higher.",
      "@type": "propertyValue"
    },
    {
      "name": "Z2.5_accuracy",
      "description": "The accuracy for Z scored response latencies after excluding Z greater than 2.5 and higher. Note that you had to get the item right for it to be included in the Z-score calculation, so this value is always 1.",
      "@type": "propertyValue"
    },
    {
      "name": "Z3.0_avgRT",
      "description": "The average raw response latency excluding outliers of 3.0 Z or higher.",
      "@type": "propertyValue"
    },
    {
      "name": "Z3.0_avgZ_RT",
      "description": "The average Z-scored response latency excluding outliers of 3.0 Z or higher.",
      "@type": "propertyValue"
    },
    {
      "name": "Z3.0_samplesize",
      "description": "The number of data points viewed that were not Z equal 3.0 or higher",
      "@type": "propertyValue"
    },
    {
      "name": "Z3.0_n_answered",
      "description": "The number of data points viewed that were not Z equal 3.0 or higher. Note that viewed and answered are the same because you had to answer the item (correctly) for it be to included in the Z-score calculation.",
      "@type": "propertyValue"
    },
    {
      "name": "Z3.0_seRT",
      "description": "Standard error of the raw response latency after removing Z equal 3.0 or higher.",
      "@type": "propertyValue"
    },
    {
      "name": "Z3.0_seZ_RT",
      "description": "Standard error of the Z-scored response latency after removing Z equal 3.0 or higher.",
      "@type": "propertyValue"
    },
    {
      "name": "Z3.0_accuracy",
      "description": "The accuracy for Z scored response latencies after excluding Z greater than 3.0 and higher. Note that you had to get the item right for it to be included in the Z-score calculation, so this value is always 1.",
      "@type": "propertyValue"
    }
  ]
}`